Skip to content

Conversation

@aemerson
Copy link
Contributor

When the stars align to conspire against stack alignment, when we have
frame-pointer=non-leaf we can incorrectly skip preserving fp/r7 in the prolog.
The fix here first makes sure we're using the right frame pointer register in
the context of preserving the incoming FP, and then make sure that we save
the FP when re-alignment is known to be necessary.

rdar://162462271

When the stars align to conspire against stack alignment, when we have
frame-pointer=non-leaf we can incorrectly skip preserving fp/r7 in the prolog.
The fix here first makes sure we're using the right frame pointer register in
the context of preserving the incoming FP, and then make sure that we save
the FP when re-alignment is known to be necessary.

rdar://162462271
Copy link
Contributor Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@aemerson aemerson assigned ostannard and unassigned ostannard Oct 16, 2025
@aemerson aemerson requested a review from ostannard October 16, 2025 05:11
@aemerson aemerson marked this pull request as ready for review October 16, 2025 05:11
@llvmbot
Copy link
Member

llvmbot commented Oct 16, 2025

@llvm/pr-subscribers-backend-arm

Author: Amara Emerson (aemerson)

Changes

When the stars align to conspire against stack alignment, when we have
frame-pointer=non-leaf we can incorrectly skip preserving fp/r7 in the prolog.
The fix here first makes sure we're using the right frame pointer register in
the context of preserving the incoming FP, and then make sure that we save
the FP when re-alignment is known to be necessary.

rdar://162462271


Full diff: https://github.com/llvm/llvm-project/pull/163699.diff

2 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMFrameLowering.cpp (+6-2)
  • (added) llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll (+37)
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 138981ad92a87..a6c40e5212132 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2537,7 +2537,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   (void)TRI;  // Silence unused warning in non-assert builds.
-  Register FramePtr = RegInfo->getFrameRegister(MF);
+  Register FramePtr = STI.getFramePointerReg();
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
       STI.getPushPopSplitVariation(MF);
 
@@ -2784,7 +2784,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
     AFI->setHasStackFrame(true);
 
-    if (HasFP) {
+    // Save the FP if:
+    // 1. We currently need it (HasFP), OR
+    // 2. We might need it later due to stack realignment from aligned DPRCS2
+    //    saves (which will make hasFP() become true in emitPrologue).
+    if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
       SavedRegs.set(FramePtr);
       // If the frame pointer is required by the ABI, also spill LR so that we
       // emit a complete frame record.
diff --git a/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll b/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll
new file mode 100644
index 0000000000000..fefa5a0a68020
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/save-fp-with-non-leaf.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
+target datalayout = "e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-darwin"
+
+; This test checks that even with NEON register induced stack re-alignment, and
+; with the frame-pointer=non-leaf option, that we still save fp aka r7 in the
+; prolog as required.
+
+define fastcc i32 @test_save_fp() #0 {
+; CHECK-LABEL: test_save_fp:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, r7, lr}
+; CHECK-NEXT:    add r7, sp, #4
+; CHECK-NEXT:    sub.w r4, sp, #64
+; CHECK-NEXT:    bfc r4, #0, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vst1.64 {d12, d13, d14, d15}, [r4:128]
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    @ InlineAsm Start
+; CHECK-NEXT:    vld1.16 {d0, d1, d2, d3}, [r0]
+; CHECK-NEXT:    vld1.16 {d4, d5, d6, d7}, [r0]
+; CHECK-NEXT:    vabdl.s16 q4, d0, d4
+; CHECK-EMPTY:
+; CHECK-NEXT:    @ InlineAsm End
+; CHECK-NEXT:    vld1.64 {d8, d9, d10, d11}, [r4:128]!
+; CHECK-NEXT:    vld1.64 {d12, d13, d14, d15}, [r4:128]
+; CHECK-NEXT:    subs r4, r7, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r7, pc}
+  tail call void asm sideeffect "vld1.i16 {q0,q1}, [$0]\0Avld1.i16 {q2,q3}, [$1]\0Avabdl.s16 q4, d0, d4\0A", "r,r,r,~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{memory}"(ptr null, ptr null, ptr null)
+  ret i32 0
+}
+
+attributes #0 = { "frame-pointer"="non-leaf" }

@aemerson
Copy link
Contributor Author

Ping

@aemerson
Copy link
Contributor Author

ping

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants